
*Settings
clear all
cap log close
local user "awcassidy1"
*read everything in as a string. This prevents string/numeric mismatches.
forval y=2000/2019 {
	import delimited "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2\Residential 4-17-19/`y' Res.csv", stringcols(_all) varnames(nonames) clear
	save "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2\Residential 4-17-19/res_`y'.dta", replace
	}
	
*now, append all.

use "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2/Residential 4-17-19/res_2000", clear
forval y=2001/2019 {
	append using "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2/Residential 4-17-19/res_`y'.dta"
	}
save "C:/Users/awcassidy1\Dropbox\jmp_new\ABOR Backup 2/Residential 4-17-19/res_without_header.dta", replace

import delimited "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2/res header.csv", stringcols(_all) varnames(nonames) clear
save "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2/res header.dta", replace 

use "C:/Users/awcassidy1\Dropbox\jmp_new\ABOR Backup 2/Residential 4-17-19/res_without_header.dta", clear

append using "C:\Users\awcassidy1\Dropbox\jmp_new\ABOR Backup 2/res header.dta"

*now make variable names and labels using the header
foreach v of varlist _all {
	local lastrow=_N
	la var `v' "`=`v'[`lastrow']'"
	
	local varname=subinstr("`=`v'[`lastrow']'","/"," ",.)
	local varname=subinstr("`varname'","#","Num",.)
	local varname=subinstr("`varname'"," ","_",.)
	local varname=subinstr("`varname'","9","Nine",.)

	local varname=trim("`varname'")
	local varname= lower("`varname'")
	
	di "`varname'"


	rename `v' `varname'
	}
	
drop in `lastrow'

save "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/res_all.dta", replace

use "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/res_all.dta", clear

egen salesbypid=count(zip), by(pid)


egen latlong=group(latitude longitude)

codebook latlong


egen latlongbypid=count(latlong), by(pid) 


destring mls_number, replace
codebook mls_number

*clean up the pid for travis county.

*make a variable called pidnum that will be the destringed version.
destring pid, gen(pidnum) force

format pidnum %150.0g
*convert back to string

tostring pidnum, replace format("%150.0g") force

replace pidnum="" if pidnum=="."

*now make a stub for the pid

gen pidstub=substr(pidnum,1,6)

replace pidstub=substr(pid,1,6) if missing(pidstub) & county=="Travis"
replace pidstub=substr(pid,1,7) if missing(pidstub) & substr(pid,1,1) =="R"

save "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/res_all_2.dta", replace

*export for smartystreets
use "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/res_all_2.dta", clear
rename address street
rename state_ state
keep street city state zip

duplicates drop

export delimited "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/res_all_2_ss.csv", replace

*now get the audit data and save a newer version of it.
import delimited "C:\Users\awcassidy1\Dropbox\jmp_new\audit data/2009-2013__ECAD_Residential_Energy_Audit_Data.csv", delimiter(comma) bindquote(strict) clear

drop if strpos(address,"Austin")==0

split address, p("Austin")

split address2, p("," "(" ")" "Tx")

rename address23 zip


replace zip=strtrim(zip)

replace zip=subinstr(zip,char(10),"",.)
replace zip=subinstr(zip,char(46)," ",.)
replace zip=subinstr(zip,","," ",.)

rename address24 latitude
rename address25 longitude


replace address1=strtrim(address1)

replace address1=subinstr(address1,char(10),"",.)
replace address1=subinstr(address1,char(46)," ",.)
replace address1=subinstr(address1,","," ",.)



save "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/audit_2.dta", replace

*export for smartystreets

use "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/audit_2.dta", clear

rename address1 street
gen state="TX"
gen city="Austin"
keep street state city zip

duplicates drop

export delimited "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/audit_2_ss.csv", replace


*read in permit data & prep for smartystreets.

import delimited "C:\Users\awcassidy1\Dropbox\jmp_new\permit_data/Issued_Construction_Permits.csv", clear
keep originaladdress1 originalcity originalstate originalzip
rename originaladdress1 street
rename originalcity city
rename originalzip zip
rename originalstate state

duplicates drop

export delimited "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/permits_ss.csv", replace
